"""
Example 1: 用 requests 和 BeautifulSoup 下载网络小说
知识：网络请求、HTML结构 和 BeautifulSoup
"""

import requests
from bs4 import BeautifulSoup

menu = 'http://www.ouoou.com/ou_26238/'
res = requests.get(menu)
bs = BeautifulSoup(res.content)

# 直接搜名字
filter = lambda x: x.name == 'a' and '章' in x.text
# chapters = bs.find_all(filter)

# 用 DOM 结构去找，更精细
ls = bs.select('#list')[0]
chapters = ls.find_all(filter)

print(len(chapters))  # 5930


# 开始下载
def download_and_save_one_chapter(a_tag):
    url = 'http://www.ouoou.com' + a_tag['href']
    r = requests.get(url)
    html = BeautifulSoup(r.content)
    title: str = html.find('h1').text.strip()
    if not title.startswith('第'): title = '第' + title
    content: str = html.select('#content')[0].text
    content = content.replace('\xa0', '').\
        replace('\r', '').replace('</br>', '').strip()
    contents = content.split()
    if contents[-1].startswith('：'): contents.pop()
    content = '\n\n'.join(contents)
    with open(title + '.txt', 'w', encoding='utf-8') as f:
        f.write(title)
        f.write('\n\n')
        f.write(content)

from concurrent.futures import ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=8) as executor:
    executor.map(download_and_save_one_chapter, chapters)


